CRIME <- read.csv("crime23.csv")
head(CRIME)
## category persistent_id date lat long street_id
## 1 anti-social-behaviour 2023-01 51.88306 0.909136 2153366
## 2 anti-social-behaviour 2023-01 51.90124 0.901681 2153173
## 3 anti-social-behaviour 2023-01 51.88907 0.897722 2153077
## 4 anti-social-behaviour 2023-01 51.89122 0.901988 2153186
## 5 anti-social-behaviour 2023-01 51.89416 0.895433 2153012
## 6 anti-social-behaviour 2023-01 51.88050 0.909014 2153379
## street_name context id location_type
## 1 On or near Military Road NA 107596596 Force
## 2 On or near NA 107596646 Force
## 3 On or near Culver Street West NA 107595950 Force
## 4 On or near Ryegate Road NA 107595953 Force
## 5 On or near Market Close NA 107595979 Force
## 6 On or near Lisle Road NA 107595985 Force
## location_subtype outcome_status
## 1 <NA>
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
names(CRIME) # Variable Names
## [1] "category" "persistent_id" "date" "lat"
## [5] "long" "street_id" "street_name" "context"
## [9] "id" "location_type" "location_subtype" "outcome_status"
dim(CRIME) # Checking dimensions of dataset
## [1] 6878 12
str(CRIME)
## 'data.frame': 6878 obs. of 12 variables:
## $ category : chr "anti-social-behaviour" "anti-social-behaviour" "anti-social-behaviour" "anti-social-behaviour" ...
## $ persistent_id : chr "" "" "" "" ...
## $ date : chr "2023-01" "2023-01" "2023-01" "2023-01" ...
## $ lat : num 51.9 51.9 51.9 51.9 51.9 ...
## $ long : num 0.909 0.902 0.898 0.902 0.895 ...
## $ street_id : int 2153366 2153173 2153077 2153186 2153012 2153379 2153105 2153541 2152937 2153107 ...
## $ street_name : chr "On or near Military Road" "On or near " "On or near Culver Street West" "On or near Ryegate Road" ...
## $ context : logi NA NA NA NA NA NA ...
## $ id : int 107596596 107596646 107595950 107595953 107595979 107595985 107596603 107596291 107596305 107596453 ...
## $ location_type : chr "Force" "Force" "Force" "Force" ...
## $ location_subtype: chr "" "" "" "" ...
## $ outcome_status : chr NA NA NA NA ...
attach(CRIME)
# Checking Missing values
(missing_values <- colSums(is.na(CRIME)) / nrow(CRIME))
## category persistent_id date lat
## 0.00000000 0.00000000 0.00000000 0.00000000
## long street_id street_name context
## 0.00000000 0.00000000 0.00000000 1.00000000
## id location_type location_subtype outcome_status
## 0.00000000 0.00000000 0.00000000 0.09842978
(variable_name <- names(missing_values)[which.max(missing_values)])
## [1] "context"
# Removing Variables with MOST missing values
CRIME <- CRIME[ ,!colnames(CRIME) %in% variable_name]
# Replacing N/A values with "Unknown" for Character class Variables
CRIME$outcome_status[is.na(CRIME$outcome_status)] <- "Unknown"
# Converting date variable into Proper Date Format by Adding a day to the string date :Setting the day to be the 01 for every month
CRIME$date <- gsub("\\[|\\]", "", CRIME$date)
CRIME$date <- paste0(CRIME$date, "-01")
CRIME$date <- as.Date(CRIME$date)
# Converting Categorical Variable to Numeric and creating a New Column
CRIME$outcome_status_numeric <- as.numeric(factor(CRIME$outcome_status))
CRIME$category_numeric <- as.numeric(factor(CRIME$category))
names(CRIME) # Cleaned and Pre-processed Dataset now to be used for Visualization
## [1] "category" "persistent_id" "date"
## [4] "lat" "long" "street_id"
## [7] "street_name" "id" "location_type"
## [10] "location_subtype" "outcome_status" "outcome_status_numeric"
## [13] "category_numeric"
head(CRIME)
## category persistent_id date lat long street_id
## 1 anti-social-behaviour 2023-01-01 51.88306 0.909136 2153366
## 2 anti-social-behaviour 2023-01-01 51.90124 0.901681 2153173
## 3 anti-social-behaviour 2023-01-01 51.88907 0.897722 2153077
## 4 anti-social-behaviour 2023-01-01 51.89122 0.901988 2153186
## 5 anti-social-behaviour 2023-01-01 51.89416 0.895433 2153012
## 6 anti-social-behaviour 2023-01-01 51.88050 0.909014 2153379
## street_name id location_type location_subtype
## 1 On or near Military Road 107596596 Force
## 2 On or near 107596646 Force
## 3 On or near Culver Street West 107595950 Force
## 4 On or near Ryegate Road 107595953 Force
## 5 On or near Market Close 107595979 Force
## 6 On or near Lisle Road 107595985 Force
## outcome_status outcome_status_numeric category_numeric
## 1 Unknown 14 1
## 2 Unknown 14 1
## 3 Unknown 14 1
## 4 Unknown 14 1
## 5 Unknown 14 1
## 6 Unknown 14 1
# Summary Statistics for the Dataset
summary(CRIME)
## category persistent_id date lat
## Length:6878 Length:6878 Min. :2023-01-01 Min. :51.88
## Class :character Class :character 1st Qu.:2023-04-01 1st Qu.:51.89
## Mode :character Mode :character Median :2023-07-01 Median :51.89
## Mean :2023-06-17 Mean :51.89
## 3rd Qu.:2023-09-01 3rd Qu.:51.89
## Max. :2023-12-01 Max. :51.90
## long street_id street_name id
## Min. :0.8793 Min. :2152702 Length:6878 Min. :107582824
## 1st Qu.:0.8964 1st Qu.:2153025 Class :character 1st Qu.:109309182
## Median :0.9014 Median :2153158 Mode :character Median :111497486
## Mean :0.9030 Mean :2153877 Mean :111301793
## 3rd Qu.:0.9088 3rd Qu.:2153365 3rd Qu.:113746477
## Max. :0.9246 Max. :2343256 Max. :115699577
## location_type location_subtype outcome_status
## Length:6878 Length:6878 Length:6878
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## outcome_status_numeric category_numeric
## Min. : 1.000 Min. : 1.000
## 1st Qu.: 7.000 1st Qu.: 5.000
## Median : 8.000 Median :11.000
## Mean : 9.198 Mean : 9.398
## 3rd Qu.:12.000 3rd Qu.:14.000
## Max. :14.000 Max. :14.000
attach(CRIME)
## The following objects are masked from CRIME (pos = 3):
##
## category, date, id, lat, location_subtype, location_type, long,
## outcome_status, persistent_id, street_id, street_name
# Display the two-way table Between Crime category and Location Type
(CRIME_Two_Way_TABLE <- table(category, location_type))
## location_type
## category BTP Force
## anti-social-behaviour 0 677
## bicycle-theft 4 231
## burglary 0 225
## criminal-damage-arson 1 580
## drugs 0 208
## other-crime 0 92
## other-theft 4 487
## possession-of-weapons 0 74
## public-order 6 526
## robbery 0 94
## shoplifting 0 554
## theft-from-the-person 0 76
## vehicle-crime 1 405
## violent-crime 8 2625
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
CRIME_Bar_Plot <- ggplot(CRIME, aes(x = date)) +
geom_bar(fill = scales::alpha("cyan", 0.7), color = "black") +
labs(title = "Monthly Crime Rate",
x = "Month",
y = "Crime Count") + theme_minimal()
ggplotly(CRIME_Bar_Plot)
CRIME_Box_Plot <- ggplotly(
ggplot(CRIME, aes(x = category)) +
geom_bar(fill = rainbow(length(unique(category)))) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
labs(title = "Crime Categories Variation", x = "Crime Categories")
)
CRIME_Box_Plot
# Calculating the Density
Density_OS <- density(outcome_status_numeric)
CRIME_Density_Plot <- plot_ly() %>%
add_trace(x = Density_OS$x, y = Density_OS$y, type = "scatter", mode = "lines",
line = list(color = "blue"), name = "Density Plot") %>%
layout(title = "Density Plot For Outcome Status of Crime",
xaxis = list(title = "Outcome Status of the Crime"),
yaxis = list(title = "Density"))
CRIME_Density_Plot
# Scatter plot for Location_type with respect to Latitude and Longitude
CRIME_Scatter_Plot <- ggplot(CRIME, aes(x = long, y = lat, color = location_type)) +
geom_point() +
labs(title = "Scatter Plot of Crime Police Locations",
x = "Longitude", y = "Latitude",
color = "Crime Police Locations")
ggplotly(CRIME_Scatter_Plot)
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.92 loaded
library(heatmaply)
## Warning: package 'heatmaply' was built under R version 4.3.3
## Loading required package: viridis
## Warning: package 'viridis' was built under R version 4.3.3
## Loading required package: viridisLite
##
## ======================
## Welcome to heatmaply version 1.5.0
##
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
##
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags:
## https://stackoverflow.com/questions/tagged/heatmaply
## ======================
(CRIME_Cor_matrix <- cor(CRIME[, c("category_numeric", "outcome_status_numeric", "street_id")]))
## category_numeric outcome_status_numeric street_id
## category_numeric 1.000000000 -0.12300438 -0.002875317
## outcome_status_numeric -0.123004378 1.00000000 0.025296258
## street_id -0.002875317 0.02529626 1.000000000
heatmaply(CRIME_Cor_matrix,
main = "Correlation Matrix Featuring Crime category and Outcome Status With Respect to Street",
fontsize_row = 7,
fontsize_col = 7,
layout_kwargs = list(width = 600, height = 400))
#count_data <- CRIME %>% group_by(date, category_numeric) %>% summarise(count = n())
CRIME_Time_Series_Plot <- plot_ly(data = CRIME, x = ~date, color = ~category, type = 'scatter', mode = 'lines') %>%
layout(title = "Time Series Plot of Crime Categories Over Time",
xaxis = list(title = "Date"),
yaxis = list(title = "Frequency"),
colorway = c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf",
"#aec7e8", "#ffbb78", "#98df8a", "#ff9896"))
CRIME_Time_Series_Plot
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.3.3
CRIME_Leaflet_Plot <- leaflet(data = CRIME) %>%
addTiles() %>% addMarkers(~long, ~lat, popup = ~as.character(CRIME$category))
CRIME_Leaflet_Plot
TEMPERATURE <- read.csv("temp2023.csv")
head(TEMPERATURE)
## station_ID Date TemperatureCAvg TemperatureCMax TemperatureCMin TdAvgC
## 1 3590 12/31/2023 8.7 10.6 4.4 7.2
## 2 3590 12/30/2023 6.6 9.7 4.4 4.2
## 3 3590 12/29/2023 9.9 11.4 6.9 6.0
## 4 3590 12/28/2023 9.9 11.5 4.0 7.5
## 5 3590 12/27/2023 5.8 10.6 3.9 3.7
## 6 3590 12/26/2023 9.8 12.7 6.3 7.6
## HrAvg WindkmhDir WindkmhInt WindkmhGust PresslevHp Precmm TotClOct lowClOct
## 1 89.6 S 25.0 63.0 999.0 6.2 8.0 8.0
## 2 85.5 WSW 22.7 50.0 1006.9 0.4 4.6 6.5
## 3 77.2 SW 32.8 61.2 1003.6 0.8 6.5 6.7
## 4 84.6 SSW 32.2 70.4 1003.2 2.8 6.8 7.1
## 5 86.4 SW 13.2 37.1 1016.4 2.0 4.0 6.9
## 6 86.9 WSW 23.5 46.3 1006.2 4.4 6.5 7.4
## SunD1h VisKm PreselevHp SnowDepcm
## 1 0.0 26.3 NA NA
## 2 1.1 48.3 NA NA
## 3 0.1 26.7 NA NA
## 4 0.0 25.1 NA NA
## 5 3.2 30.1 NA NA
## 6 0.0 45.8 NA NA
names(TEMPERATURE)
## [1] "station_ID" "Date" "TemperatureCAvg" "TemperatureCMax"
## [5] "TemperatureCMin" "TdAvgC" "HrAvg" "WindkmhDir"
## [9] "WindkmhInt" "WindkmhGust" "PresslevHp" "Precmm"
## [13] "TotClOct" "lowClOct" "SunD1h" "VisKm"
## [17] "PreselevHp" "SnowDepcm"
dim(TEMPERATURE)
## [1] 365 18
str(TEMPERATURE)
## 'data.frame': 365 obs. of 18 variables:
## $ station_ID : int 3590 3590 3590 3590 3590 3590 3590 3590 3590 3590 ...
## $ Date : chr "12/31/2023" "12/30/2023" "12/29/2023" "12/28/2023" ...
## $ TemperatureCAvg: num 8.7 6.6 9.9 9.9 5.8 9.8 12.5 10 9.6 10 ...
## $ TemperatureCMax: num 10.6 9.7 11.4 11.5 10.6 12.7 14.3 12 10.8 12.6 ...
## $ TemperatureCMin: num 4.4 4.4 6.9 4 3.9 6.3 9.5 8.4 8.1 8.1 ...
## $ TdAvgC : num 7.2 4.2 6 7.5 3.7 7.6 10.1 7 6.5 6.2 ...
## $ HrAvg : num 89.6 85.5 77.2 84.6 86.4 86.9 85.3 81.5 81.2 78.2 ...
## $ WindkmhDir : chr "S" "WSW" "SW" "SSW" ...
## $ WindkmhInt : num 25 22.7 32.8 32.2 13.2 23.5 34.1 32.7 34.1 37.5 ...
## $ WindkmhGust : num 63 50 61.2 70.4 37.1 46.3 72.3 61.2 68.6 77.8 ...
## $ PresslevHp : num 999 1007 1004 1003 1016 ...
## $ Precmm : num 6.2 0.4 0.8 2.8 2 4.4 0.8 0.8 0 2 ...
## $ TotClOct : num 8 4.6 6.5 6.8 4 6.5 7.8 5 8 7.5 ...
## $ lowClOct : num 8 6.5 6.7 7.1 6.9 7.4 7.8 6.7 8 7.5 ...
## $ SunD1h : num 0 1.1 0.1 0 3.2 0 0 2.9 0 1.4 ...
## $ VisKm : num 26.3 48.3 26.7 25.1 30.1 45.8 61.8 72.9 69.4 34.3 ...
## $ PreselevHp : logi NA NA NA NA NA NA ...
## $ SnowDepcm : int NA NA NA NA NA NA NA NA NA NA ...
# Checking Missing values
(missing_values <- colSums(is.na(TEMPERATURE)) / nrow(TEMPERATURE))
## station_ID Date TemperatureCAvg TemperatureCMax TemperatureCMin
## 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## TdAvgC HrAvg WindkmhDir WindkmhInt WindkmhGust
## 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## PresslevHp Precmm TotClOct lowClOct SunD1h
## 0.00000000 0.07397260 0.00000000 0.03561644 0.22465753
## VisKm PreselevHp SnowDepcm
## 0.00000000 1.00000000 0.99726027
(variable_name <- names(sort(missing_values, decreasing = TRUE)[1:2]))
## [1] "PreselevHp" "SnowDepcm"
# Removing Variables with MOST missing values
TEMPERATURE <- TEMPERATURE[ , !colnames(TEMPERATURE) %in% variable_name]
names(TEMPERATURE)
## [1] "station_ID" "Date" "TemperatureCAvg" "TemperatureCMax"
## [5] "TemperatureCMin" "TdAvgC" "HrAvg" "WindkmhDir"
## [9] "WindkmhInt" "WindkmhGust" "PresslevHp" "Precmm"
## [13] "TotClOct" "lowClOct" "SunD1h" "VisKm"
# Pre-processing
TEMPERATURE$Precmm[is.na(TEMPERATURE$Precmm)] <- 0
TEMPERATURE$lowClOct[is.na(TEMPERATURE$lowClOct)] <- 0
TEMPERATURE$SunD1h[is.na(TEMPERATURE$SunD1h)] <- 0
# Checking for missing values again
(missing_values <- colSums(is.na(TEMPERATURE)) / nrow(TEMPERATURE))
## station_ID Date TemperatureCAvg TemperatureCMax TemperatureCMin
## 0 0 0 0 0
## TdAvgC HrAvg WindkmhDir WindkmhInt WindkmhGust
## 0 0 0 0 0
## PresslevHp Precmm TotClOct lowClOct SunD1h
## 0 0 0 0 0
## VisKm
## 0
# Converting Date column to Proper Date format
TEMPERATURE$Date <- as.Date(TEMPERATURE$Date, format = "%m/%d/%Y")
head(TEMPERATURE)
## station_ID Date TemperatureCAvg TemperatureCMax TemperatureCMin TdAvgC
## 1 3590 2023-12-31 8.7 10.6 4.4 7.2
## 2 3590 2023-12-30 6.6 9.7 4.4 4.2
## 3 3590 2023-12-29 9.9 11.4 6.9 6.0
## 4 3590 2023-12-28 9.9 11.5 4.0 7.5
## 5 3590 2023-12-27 5.8 10.6 3.9 3.7
## 6 3590 2023-12-26 9.8 12.7 6.3 7.6
## HrAvg WindkmhDir WindkmhInt WindkmhGust PresslevHp Precmm TotClOct lowClOct
## 1 89.6 S 25.0 63.0 999.0 6.2 8.0 8.0
## 2 85.5 WSW 22.7 50.0 1006.9 0.4 4.6 6.5
## 3 77.2 SW 32.8 61.2 1003.6 0.8 6.5 6.7
## 4 84.6 SSW 32.2 70.4 1003.2 2.8 6.8 7.1
## 5 86.4 SW 13.2 37.1 1016.4 2.0 4.0 6.9
## 6 86.9 WSW 23.5 46.3 1006.2 4.4 6.5 7.4
## SunD1h VisKm
## 1 0.0 26.3
## 2 1.1 48.3
## 3 0.1 26.7
## 4 0.0 25.1
## 5 3.2 30.1
## 6 0.0 45.8
summary(TEMPERATURE)
## station_ID Date TemperatureCAvg TemperatureCMax
## Min. :3590 Min. :2023-01-01 Min. :-2.60 Min. : 1.70
## 1st Qu.:3590 1st Qu.:2023-04-02 1st Qu.: 7.20 1st Qu.:10.60
## Median :3590 Median :2023-07-02 Median :10.40 Median :14.20
## Mean :3590 Mean :2023-07-02 Mean :10.92 Mean :15.13
## 3rd Qu.:3590 3rd Qu.:2023-10-01 3rd Qu.:15.80 3rd Qu.:20.00
## Max. :3590 Max. :2023-12-31 Max. :23.10 Max. :30.40
## TemperatureCMin TdAvgC HrAvg WindkmhDir
## Min. :-6.200 Min. :-4.400 Min. :43.10 Length:365
## 1st Qu.: 3.200 1st Qu.: 4.400 1st Qu.:75.60 Class :character
## Median : 6.300 Median : 7.600 Median :81.70 Mode :character
## Mean : 6.365 Mean : 7.578 Mean :81.25
## 3rd Qu.:10.600 3rd Qu.:11.200 3rd Qu.:87.90
## Max. :16.300 Max. :17.500 Max. :97.90
## WindkmhInt WindkmhGust PresslevHp Precmm
## Min. : 6.20 Min. :13.00 Min. : 967.4 Min. : 0.000
## 1st Qu.:12.00 1st Qu.:31.50 1st Qu.:1006.3 1st Qu.: 0.000
## Median :16.10 Median :38.90 Median :1014.3 Median : 0.000
## Mean :16.81 Mean :40.87 Mean :1013.6 Mean : 1.728
## 3rd Qu.:20.20 3rd Qu.:48.20 3rd Qu.:1021.7 3rd Qu.: 0.800
## Max. :37.50 Max. :98.20 Max. :1045.1 Max. :33.600
## TotClOct lowClOct SunD1h VisKm
## Min. :0.000 Min. :0.000 Min. : 0.000 Min. : 3.60
## 1st Qu.:3.600 1st Qu.:5.500 1st Qu.: 0.000 1st Qu.:22.70
## Median :5.100 Median :6.700 Median : 2.800 Median :31.50
## Mean :4.988 Mean :6.214 Mean : 3.975 Mean :32.11
## 3rd Qu.:7.000 3rd Qu.:7.400 3rd Qu.: 6.900 3rd Qu.:41.50
## Max. :8.000 Max. :8.000 Max. :15.400 Max. :72.90
attach(TEMPERATURE)
# Displaying the Two-Way Table Between Station_ID and Wind Direction
(TEMP_Two_Way_TABLE <- table(station_ID, WindkmhDir))
## WindkmhDir
## station_ID E ENE ESE N NE NNE NNW NW S SE SSE SSW SW W WNW WSW
## 3590 10 20 11 18 22 15 13 15 26 6 14 41 49 34 13 58
TEMP_Bar_Plot <- ggplotly(
ggplot(TEMPERATURE, aes(x = WindkmhDir)) +
geom_bar(fill = "magenta", color = "black") +
labs(title = "Variation in Wind Direction",
x = "Wind Direction",
y = "Count") +
theme_minimal()
)
TEMP_Bar_Plot
TEMP_Histogram_Plot <- plot_ly(
x = TemperatureCAvg,
type = "histogram",
histnorm = "probability density",
marker = list(color = "blue", line = list(color = "black", width = 1)),
opacity = 0.7
) %>%
layout(
title = "Histogram Plot For Average Temperature",
xaxis = list(title = "Average Temperature in Degree Celsius"),
yaxis = list(title = "Density")
)
TEMP_Histogram_Plot
# Extracting Months from the Date Format for Better Visualization
TEMPERATURE$Month <- format(as.Date(TEMPERATURE$Date), "%B")
TEMP_Box_Plot <- plot_ly(
data = TEMPERATURE,
x = ~WindkmhDir,
y = ~Month,
type = "box",
orientation = "h"
) %>%
layout(
title = "Boxplot of Wind Speed (KmH) With Respect To Months",
xaxis = list(title = "WindSpeed"),
yaxis = list(title = "Month")
)
TEMP_Box_Plot
TEMP_Scatter_Plot <- plot_ly(
data = TEMPERATURE,
x = ~Month,
y = ~SunD1h,
color = ~TemperatureCMax,
type = "scatter",
mode = "markers"
) %>%
layout(
title = "Scatter Plot of Sunshine Duration",
xaxis = list(title = "Month"),
yaxis = list(title = "Sunshine Duration in Hours"),
colorway =
c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf")
)
TEMP_Scatter_Plot
(TEMPERATURE_Cor_matrix <- cor(TEMPERATURE[, c("TemperatureCAvg", "TdAvgC", "HrAvg", "WindkmhInt", "Precmm", "TotClOct", "SunD1h")]))
## TemperatureCAvg TdAvgC HrAvg WindkmhInt Precmm
## TemperatureCAvg 1.00000000 0.94629915 -0.52666527 -0.03172960 0.08921821
## TdAvgC 0.94629915 1.00000000 -0.22746978 -0.02068238 0.19480397
## HrAvg -0.52666527 -0.22746978 1.00000000 0.01832991 0.25528080
## WindkmhInt -0.03172960 -0.02068238 0.01832991 1.00000000 0.11304716
## Precmm 0.08921821 0.19480397 0.25528080 0.11304716 1.00000000
## TotClOct -0.10987103 0.04579580 0.42819783 0.25223264 0.26992368
## SunD1h 0.49991612 0.30118466 -0.70223706 -0.23065525 -0.19022493
## TotClOct SunD1h
## TemperatureCAvg -0.1098710 0.4999161
## TdAvgC 0.0457958 0.3011847
## HrAvg 0.4281978 -0.7022371
## WindkmhInt 0.2522326 -0.2306552
## Precmm 0.2699237 -0.1902249
## TotClOct 1.0000000 -0.6489947
## SunD1h -0.6489947 1.0000000
TEMPERATURE_Cor_Matrix_Plot <- plot_ly( z = TEMPERATURE_Cor_matrix,
x = colnames(TEMPERATURE_Cor_matrix),
y = colnames(TEMPERATURE_Cor_matrix),
type = "heatmap",
colorscale = "Viridis"
) %>%
layout(
title = "Correlation Matrix For Meteorological Parameters",
xaxis = list(title = "Meteorological Parameters"),
yaxis = list(title = "Meteorological Parameters")
)
TEMPERATURE_Cor_Matrix_Plot
TEMPERATURE$Date <- as.Date(TEMPERATURE$Date)
TEMP_Time_Series_Plot <- plot_ly(
data = TEMPERATURE,
x = ~Date,
y = ~TotClOct,
type = "scatter",
mode = "lines",
name = "Total Cloudiness",
line = list(smoothing = 0.9)
) %>%
layout(
title = "Time Series Plot For Total Cloudiness over the Year",
xaxis = list(title = "Months"),
yaxis = list(title = "Total Cloudiness")
)
TEMP_Time_Series_Plot
TEMP_Smoothed_Cloudiness <- TEMPERATURE %>%
mutate(Date = as.Date(Date)) %>%
group_by(Date) %>%
summarise(TotClOct = mean(TotClOct)) %>%
mutate(Smoothed_TotClOct = smooth.spline(TotClOct)$y)
TEMP_Smoothed_Cloudiness <- ggplot(TEMP_Smoothed_Cloudiness, aes(x = Date, y = Smoothed_TotClOct)) +
geom_line(color = "blue") +
labs(title = "Variation of Total Cloudiness Over the Entire Year",
x = "Months",
y = "Total Cloudiness")
ggplotly(TEMP_Smoothed_Cloudiness)